{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "3890aae0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "# 1 获取数据\n",
    "data = pd.read_csv(\"./data/titanic.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "2ac049db",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\huishi\\AppData\\Local\\Temp\\ipykernel_29616\\3967622678.py:6: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  x[\"age\"].fillna(x[\"age\"].mean(), inplace=True)\n",
      "C:\\Users\\huishi\\AppData\\Local\\Temp\\ipykernel_29616\\3967622678.py:7: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  x[\"embarked\"].fillna(\"Unknown\", inplace=True)\n",
      "C:\\Users\\huishi\\AppData\\Local\\Temp\\ipykernel_29616\\3967622678.py:8: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  x[\"home.dest\"].fillna(\"Unknown\", inplace=True)\n",
      "C:\\Users\\huishi\\AppData\\Local\\Temp\\ipykernel_29616\\3967622678.py:9: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  x[\"room\"].fillna(\"Unknown\", inplace=True)\n",
      "C:\\Users\\huishi\\AppData\\Local\\Temp\\ipykernel_29616\\3967622678.py:10: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  x[\"boat\"].fillna(\"Unknown\", inplace=True)\n"
     ]
    }
   ],
   "source": [
    "# 2数据预处理\n",
    "# 划分特征值和目标值\n",
    "x = data[[\"pclass\", \"age\", \"embarked\", \"home.dest\", \"room\", \"boat\", \"sex\"]]\n",
    "y = data[\"survived\"]\n",
    "# 缺失值处理 home.dest room boat\n",
    "x[\"age\"].fillna(x[\"age\"].mean(), inplace=True)\n",
    "x[\"embarked\"].fillna(\"Unknown\", inplace=True)\n",
    "x[\"home.dest\"].fillna(\"Unknown\", inplace=True)\n",
    "x[\"room\"].fillna(\"Unknown\", inplace=True)\n",
    "x[\"boat\"].fillna(\"Unknown\", inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "e6822431",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pclass        object\n",
       "age          float64\n",
       "embarked      object\n",
       "home.dest     object\n",
       "room          object\n",
       "boat          object\n",
       "sex           object\n",
       "dtype: object"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "50d725a4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pclass</th>\n",
       "      <th>age</th>\n",
       "      <th>embarked</th>\n",
       "      <th>home.dest</th>\n",
       "      <th>room</th>\n",
       "      <th>boat</th>\n",
       "      <th>sex</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1st</td>\n",
       "      <td>29.000000</td>\n",
       "      <td>Southampton</td>\n",
       "      <td>St Louis, MO</td>\n",
       "      <td>B-5</td>\n",
       "      <td>2</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1st</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>Southampton</td>\n",
       "      <td>Montreal, PQ / Chesterville, ON</td>\n",
       "      <td>C26</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1st</td>\n",
       "      <td>30.000000</td>\n",
       "      <td>Southampton</td>\n",
       "      <td>Montreal, PQ / Chesterville, ON</td>\n",
       "      <td>C26</td>\n",
       "      <td>(135)</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1st</td>\n",
       "      <td>25.000000</td>\n",
       "      <td>Southampton</td>\n",
       "      <td>Montreal, PQ / Chesterville, ON</td>\n",
       "      <td>C26</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1st</td>\n",
       "      <td>0.916700</td>\n",
       "      <td>Southampton</td>\n",
       "      <td>Montreal, PQ / Chesterville, ON</td>\n",
       "      <td>C22</td>\n",
       "      <td>11</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1308</th>\n",
       "      <td>3rd</td>\n",
       "      <td>31.194181</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1309</th>\n",
       "      <td>3rd</td>\n",
       "      <td>31.194181</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1310</th>\n",
       "      <td>3rd</td>\n",
       "      <td>31.194181</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1311</th>\n",
       "      <td>3rd</td>\n",
       "      <td>31.194181</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1312</th>\n",
       "      <td>3rd</td>\n",
       "      <td>31.194181</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1313 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     pclass        age     embarked                        home.dest     room     boat     sex\n",
       "0       1st  29.000000  Southampton                     St Louis, MO      B-5        2  female\n",
       "1       1st   2.000000  Southampton  Montreal, PQ / Chesterville, ON      C26  Unknown  female\n",
       "2       1st  30.000000  Southampton  Montreal, PQ / Chesterville, ON      C26    (135)    male\n",
       "3       1st  25.000000  Southampton  Montreal, PQ / Chesterville, ON      C26  Unknown  female\n",
       "4       1st   0.916700  Southampton  Montreal, PQ / Chesterville, ON      C22       11    male\n",
       "...     ...        ...          ...                              ...      ...      ...     ...\n",
       "1308    3rd  31.194181      Unknown                          Unknown  Unknown  Unknown    male\n",
       "1309    3rd  31.194181      Unknown                          Unknown  Unknown  Unknown    male\n",
       "1310    3rd  31.194181      Unknown                          Unknown  Unknown  Unknown    male\n",
       "1311    3rd  31.194181      Unknown                          Unknown  Unknown  Unknown  female\n",
       "1312    3rd  31.194181      Unknown                          Unknown  Unknown  Unknown    male\n",
       "\n",
       "[1313 rows x 7 columns]"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
